In [ ]:
# Basic libraries import
import numpy as np
import pandas as pd
import seaborn as sns
import h5py
from datetime import datetime
import os
# Plotting
%matplotlib notebook
#%matplotlib inline
sns.set_context("notebook", font_scale=1.5)
In [ ]:
filename = "IDHDT4_2015-2014_ATM_dhdt_canada.csv"
data = pd.read_csv(os.path.join(os.path.pardir, "data", filename))
In [ ]:
data.head()
In [ ]:
def convert_to_df(dirpath, filename):
with h5py.File(os.path.join(dirpath, filename)) as f:
df = pd.DataFrame()
df['longitude'] = f['longitude'][:]
df['elevation'] = f['elevation'][:]
df['latitude'] = f['latitude'][:]
date = datetime.strptime(filename.split('_', 1)[1].split('.')[0], '%Y%m%d_%H%M%S')
df['datetime'] = date
return df
In [ ]:
dirpath = os.path.join(os.path.pardir, "data", 'leveldata_1')
dfs = []
for filename in os.listdir(dirpath):
dfs.append(convert_to_df(dirpath, filename))
In [ ]:
res_df = pd.concat(dfs, ignore_index=True)
res_df.to_csv('', index_label=False)
res_df.to_json('', orient = 'records')
In [ ]:
def convert_input(dirpath, filename):
df = pd.read_csv(os.path.join(dirpath, filename))
# filter invalid entries
df = df[(df['THICK']>-9998) & (df['SURFACE']>-9998)]
# add date from filename
date = datetime.strptime(filename.split('_')[1], '%Y%m%d')
df['DATE'] = date
# filter by quality
df = df[df['QUALITY']==1]
# remove unused features
df = df.drop(['TIME', 'FRAME', 'QUALITY', 'SURFACE', 'ELEVATION', 'BOTTOM'], axis=1)
return df
In [ ]:
# load all data
dirpath = os.path.join(os.path.pardir, "data", 'IRMCR2_south')
dfs = []
for filename in os.listdir(dirpath):
dfs.append(convert_input(dirpath, filename))
In [ ]:
# concat data into single df
res_df = pd.concat(dfs, ignore_index=True).rename(index=str, columns={'LAT':'lat','LON':'lon','THICK':'thickness', 'DATE':'date'})
In [ ]:
# export
res_df.to_csv(os.path.join(os.path.pardir, 'data', 'IRMCR2_south.csv'), index_label=False)
res_df.to_json(os.path.join(os.path.pardir, 'data', 'IRMCR2_south.json'), orient = 'records')
In [ ]:
# average thickness by year and month
thick_avg = res_df.groupby([res_df['date'].dt.year, res_df['date'].dt.month])[['thickness']].agg('mean')
thick_avg.index.names = ['year', 'month']
thick_avg.reset_index().to_json(os.path.join(os.path.pardir, 'data', 'IRMCR2_south_thick.json'), orient = 'records')
In [ ]:
filename = 'IR2HI2_2013010_CLH_JKB2h_X18a_icethk.txt'
In [ ]:
pd.read_csv(os.path.join(os.path.pardir, "data", 'ice_thickness', filename), comment='#', sep='\s')